This walkthrough describes the population genetics analysis of SNPs generated from Orbicella faveolata samples collected across urbanized and reef habitats in southeast Florida. Sequence alignments are based on an existing genome assembly by Young and colleagues: https://doi.org/10.1186/s12864-024-10092-w. For initial processing of 2bRAD reads regardless of species, and the species-specific 2bRAD pipeline, please see below:
Library prep, bioinformatics, and analysis protocols are credited to the 2bRAD pipeline originally developed by Misha Matz: https://doi.org/10.1038/nmeth.2023, and further refined by Ryan Eckert: https://ryaneckert.github.io/ofav_FKNMS_PopGen/code/
For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires", "KarstensLab/microshades")
pacman::p_load("cowplot", "car", "ggrepel", "ggspatial", "paletteer", "patchwork", "rgdal", "rnaturalearth", "sf", "Hmisc", "MCMC.OTU", "pairwiseAdonis", "RColorBrewer", "Redmonder", "flextable", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick", "rgeos", "sdmpredictors", "ggcorrplot", "tidyverse", "TeachingDemos", "LaplacesDemon", "adespatial", "ggnewscale", "ggbeeswarm", "multcomp", "rstatix", "R.utils", "graph4lg")
options("scipen" = 10)
Making color palettes to use throughout all plots
Pal <- c("#E69F00","#009E73","#D55E00","#CC79A7","#E41A1C","#377EB8","#FF7F00","#A65628","#FC8D62","#8DA0CB","#E78AC3","#A6D854","#FFD92F")
kColPal <- c("cornflowerblue","orange","purple4","grey50")
Analyzing 2bRAD generated SNPs (20,323 loci) for population structure/genetic connectivity across urbanized and reef sites in southeast Florida
Identification of any natural clones using technical replicates as a baseline for clonality between samples
pacman::p_load("dendextend", "ggdendro", "tidyverse")
cloneBams = read.csv("../../data/ofav/ofavMetadata.csv") # list of bam files
cloneMa = as.matrix(read.table("../../data/ofav/ANGSD/clones/ofavClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams[,2],cloneBams[,2])
clonesHc = hclust(as.dist(cloneMa),"ave")
clonePops = cloneBams$region
cloneSite = cloneBams$site
cloneDend = cloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$region = clonePops[order.dendrogram(cloneDend)]
cloneDendPoints$site=cloneSite[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
cloneDendPoints$region = as.factor(cloneDendPoints$region)
cloneDendPoints$site = as.factor(cloneDendPoints$site)
# Making points at the leaves to place symbols for regions
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
techReps = c("urban_080", "urban_080_2", "urban_080_3", "urban_289", "urban_289_2")
cloneDendPoints$site = factor(cloneDendPoints$site,levels(cloneDendPoints$site)[c(3, 10, 4, 2, 12, 8, 11, 9, 5, 7, 6, 1, 13)])
cloneDendPoints$region = factor(cloneDendPoints$region,levels(cloneDendPoints$region)[c(2, 3, 4, 5, 1)])
cloneDendA = ggplot() +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = site, shape = region), size = 4, stroke = 0.25) +
# scale_fill_brewer(palette = "Dark2", name = "Site") +
scale_fill_manual(values = Pal, name= "Site")+
scale_shape_manual(values = c(21, 22, 23, 24, 25), name = "Region")+
geom_hline(yintercept = 0.12, color = "red", lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .045), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .025), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
cloneDend
ggsave("../../figures/ofav/cloneDend.pdf", plot = cloneDend, height = 8, width = 24, units = "in", dpi = 300)
ggsave("../../figures/ofav/cloneDend.png", plot = cloneDend, height = 8, width = 24, units = "in", dpi = 300)
We removed the technical replicates/clones and re-ran ANGSD for all the following analyses
pacman::p_load("dendextend", "ggdendro", "tidyverse")
bams = read.csv("../../data/ofav/ofavMetadata.csv")[-c(11,14,22,32:34,54,59,72),] # list of bams files and their populations (tech reps removed)
ma = as.matrix(read.table("../../data/ofav/ANGSD/ofavNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(bams[,2],bams[,2])
Hc = hclust(as.dist(ma),"ave")
Pops = bams$region
Site = bams$site
Dend = ma %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
DData = Dend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
DData$segments$yend2 = DData$segments$yend
for(i in 1:nrow(DData$segments)) {
if (DData$segments$yend2[i] == 0) {
DData$segments$yend2[i] = (DData$segments$y[i] - 0.01)}}
DendPoints = DData$labels
DendPoints$region = Pops[order.dendrogram(Dend)]
DendPoints$site=Site[order.dendrogram(Dend)]
rownames(DendPoints) = DendPoints$label
DendPoints$region = as.factor(DendPoints$region)
DendPoints$site = as.factor(DendPoints$site)
# Making points at the leaves to place symbols for regions
point = as.vector(NA)
for(i in 1:nrow(DData$segments)) {
if (DData$segments$yend[i] == 0) {
point[i] = DData$segments$y[i] - 0.01
} else {
point[i] = NA}}
DendPoints$y = point[!is.na(point)]
DendPoints$site = factor(DendPoints$site,levels(DendPoints$site)[c(3, 10, 4, 2, 12, 8, 11, 9, 5, 7, 6, 1, 13)])
DendPoints$region = factor(DendPoints$region,levels(DendPoints$region)[c(2, 3, 4, 5, 1)])
DendA = ggplot() +
geom_segment(data = segment(DData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = DendPoints, aes(x = x, y = y, fill = site, shape = region), size = 4, stroke = 0.25) +
# scale_fill_brewer(palette = "Dark2", name = "Site") +
scale_fill_manual(values = Pal, name= "Site")+
scale_shape_manual(values = c(21, 22, 23, 24, 25), name = "Region")+
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
Dend = DendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
Dend
ggsave("../../figures/ofav/Dend.pdf", plot = Dend, height = 8, width = 24, units = "in", dpi = 300)
ggsave("../../figures/ofav/Dend.png", plot = Dend, height = 8, width = 24, units = "in", dpi = 300)
pcangsd = read.csv("../../data/ofav/ofavMetaData.csv")[-c(11,14,22,32:34,54,59,72),] %>% dplyr::select("sample" = sampleID, "region" = region, "site" = site)
site_order <- c(
"Emerald Reef",
"Rainbow Reef",
"Fisher Island",
"Coral City Camera",
"Star Island",
"MacArthur North",
"South Canyon",
"Pillars",
"FIU Biscayne Bay",
"Graceland",
"FTL4",
"BC1",
"T328"
)
site_factor <- factor(pcangsd$site, levels = site_order)
ord <- order(site_factor)
pcangsd <- pcangsd[ord, ]
pcangsd$regionsite = as.factor(paste(pcangsd$region, pcangsd$site, sep = " "))
pcangsd$regionsite = factor(pcangsd$regionsite, levels(pcangsd$regionsite)[c(4, 5, 7, 6, 9, 8, 12, 11, 13, 10, 2, 1, 3)])
pcangsd$site = factor(pcangsd$site)
pcangsd$site = factor(pcangsd$site, levels(pcangsd$site)[c(3, 10, 4, 2, 12, 8, 11, 9, 5, 7, 6, 1, 13)])
pcangsd$region = factor(pcangsd$region)
pcangsd$region = factor(pcangsd$region, levels(pcangsd$region)[c(2, 3, 4, 5, 1)])
cov = read.table("../../data/ofav/pcangsd/ofavPcangsd.cov") %>% as.matrix()
cov <- cov[ord, ord]
pcAdmix = read.table("../../data/ofav/structure_selector/K=3/MajorCluster/CLUMPP.files/ClumppIndFile.output") %>% dplyr::select(V6, V7, V8)
pcAdmix %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 56.8283 27.8111 9.3603
pcAdmix = pcAdmix %>% rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8") %>%dplyr::select(order(colnames(.)))
pcaEig = eigen(cov)
ofavPcaVar = pcaEig$values/sum(pcaEig$values)*100
head(ofavPcaVar)
## [1] 14.444720 13.778512 2.667811 1.638213 1.180293 1.056884
pcangsd$PC1 = pcaEig$vectors[,1]
pcangsd$PC2 = pcaEig$vectors[,2]
pcangsd$PC3 = pcaEig$vectors[,3]
pcangsd$PC4 = pcaEig$vectors[,4]
pcangsdClust = pcAdmix %>% mutate(cluster = ifelse(cluster1 < 0.75 & cluster2 < 0.75 & cluster3 < 0.75, "NA", ifelse(cluster1 >=0.75, 1, ifelse(cluster2 >= 0.75, 2, ifelse(cluster3 >= 0.75, 3, 0)))))
# pcangsdClust$clusterX = as.vector(d_clust$classification)
pcangsd = pcangsd %>% mutate(pcangsdClust)
pcangsd$cluster = as.factor(pcangsd$cluster)
levels(pcangsd$cluster) = c("Blue", "Orange", "Purple", "Admixed")
bamsClusters = pcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
bamsSamples = read.delim("../../data/ofav/ANGSD/bamsNoClones", header = FALSE)
bamsClusters$sample = bamsSamples$V1
# bamsClusters = as.data.frame(bamsClusters)
write.table(x = bamsClusters, file = "../../data/ofav/ANGSD/bamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
# scp bamsClusters to HPC
pcangsd = merge(pcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~regionsite, pcangsd, mean), by="regionsite")
adonis2(pcangsd[,c(9:11)]~region*site, data = pcangsd, method = "euclidean", by = "terms")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = pcangsd[, c(9:11)] ~ region * site, data = pcangsd, method = "euclidean", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## region 4 11.266 0.26687 9.1937 0.001 ***
## site 8 6.136 0.14534 2.5035 0.004 **
## Residual 81 24.815 0.58779
## Total 93 42.218 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pcangsd %>% group_by(region,cluster) %>% summarise(n = n())
## `summarise()` has grouped output by 'region'. You can override using the `.groups`
## argument.
## # A tibble: 12 × 3
## # Groups: region [5]
## region cluster n
## <fct> <fct> <int>
## 1 Miami reef Blue 23
## 2 Miami reef Orange 1
## 3 Miami reef Admixed 3
## 4 Miami urban Blue 20
## 5 Miami urban Orange 27
## 6 Miami urban Purple 3
## 7 Miami urban Admixed 2
## 8 North Miami reef Blue 3
## 9 North Miami urban Purple 1
## 10 Broward reef Blue 8
## 11 Broward reef Orange 1
## 12 Broward reef Admixed 2
Plot PCA
pcaTheme = theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "none",
legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
legend.key.size = unit(5, "pt"),
panel.border = element_rect(color = "black", size = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
pcaPlot12SA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = site, shape = region, color = site), stroke = 0, size = 2.5, alpha = 0.5, show.legend = FALSE) +
geom_point(data = pcangsd, aes(x = mean.x, y = mean.y, fill = site, shape = region), color = "black", size = 2.75, alpha = 1, stroke = 0.25) +
scale_shape_manual(values = c(21, 22, 23, 24, 25), name = "Region") +
scale_fill_manual(values = Pal, name = "Site") +
scale_color_manual(values = Pal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(ofavPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(ofavPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 2, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = Pal, alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlot12S = pcaPlot12SA +
pcaTheme +
theme(legend.position = c(0.1, 0.67))
pcaPlot12S
pcaPlot12LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = cluster, shape = region), color = "black", size = 2, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 22, 23, 24, 25), name = "region Zone") +
scale_fill_manual(values = kColPal, name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(ofavPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(ofavPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = kColPal, alpha = NA), order = 1, ncol = 1))+
theme_bw()
pcaPlot12L = pcaPlot12LA +
pcaTheme +
theme(legend.position = c(0.85,0.15))
pcaPlot23LA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsd, aes(x = PC3, y = PC2, fill = cluster, shape = region), color = "black", size = 2, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 22, 23, 24, 25), name = "region Zone") +
scale_fill_manual(values = kColPal, name = "Lineage") +
labs(x = paste0("PC 3 (", format(round(ofavPcaVar[3], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(ofavPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 2, stroke = 0.5, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 2, fill = kColPal, alpha = NA), order = 1, ncol = 1, byrow = TRUE))+
theme_bw()
pcaPlot23L = pcaPlot23LA +
pcaTheme
Put all plots together
pcaPlots = ((pcaPlot12S + theme(axis.title.y = element_text(margin = ggplot2::margin(r = -20, unit = "pt")))) | pcaPlot12L | pcaPlot23L) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
pcaPlots
Measuring with global weighted FST calculated from SFS First prepare and sort FST for plotting
pop.order = c("Blue", "Orange", "Purple")
# reads in fst
fstMa1 <- read.delim("../../data/ofav/SFS/ofavKFst.out") %>% dplyr::select(-fst) %>% df_to_pw_mat(., "pop1", "pop2", "weightedFst")
fstMa1
## Blue Orange Purple
## Blue 0.000000 0.120398 0.788588
## Orange 0.120398 0.000000 0.704045
## Purple 0.788588 0.704045 0.000000
fstMa = fstMa1
upperTriangle(fstMa, byrow = TRUE) <- lowerTriangle(fstMa)
fstMa <- fstMa[,pop.order] %>%
.[pop.order,]
fstMa[upper.tri(fstMa)] <- NA
fstMa <- as.data.frame(fstMa)
# rearrange and reformat matrix
fstMa$Pop = factor(row.names(fstMa), levels = unique(pop.order))
# melt matrix data (turn from matrix into long dataframe)
fst = melt(fstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
fst$Fst = round(fst$Fst, 3)
fst$site = fst$Pop
fst$site = factor(gsub("\\n.*", "", fst$site))
fst$site = factor(fst$site, levels = levels(fst$site)[c(1, 2, 3)])
fst$site2 = fst$Pop2
fst$site2 = factor(gsub("\\n.*", "", fst$site2))
fst$site2 = factor(fst$site2, levels = levels(fst$site2)[c(1, 2, 3)])
fst$Pop2 = factor(fst$Pop2, levels = levels(fst$Pop2)[c(3, 2, 1)])
Construct a heatmap of FST values
fstHeatmapA = ggplot(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = fst, aes(x = 0.475, xend = 0.25, y = Pop2, yend = Pop2, color = site2), size = 21.25) + #colored boxes for Y-axis labels
geom_segment(data = fst, aes(x = Pop, xend = Pop, y = 0.2, yend = 0.475, color = site), size = 41) + #colored boxes for X-axis labels
scale_color_manual(values = kColPal, guide = NULL) +
scale_fill_gradient(low = "white", high = "#EA526F", limit = c(0, 0.79), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA, guide = "colourbar") +
geom_text(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, label = Fst), color = "black", size = 3.5, fontface = "bold") +
guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5, direction = "horizontal", ticks.colour = "black", frame.colour = "black")) +
scale_y_discrete(position = "left", limits = rev(levels(fst$Pop2))) +
scale_x_discrete(limits = levels(fst$Pop2)[c(1:3)]) +
coord_cartesian(xlim = c(1, 4), ylim = c(1, 4), clip = "off") +
theme_minimal()
fstHeatmap = fstHeatmapA + theme(
axis.text.x = element_text(vjust = 3.5, size = 10, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 10, color = "black", angle = 90, hjust = 0.5, vjust = -1.5),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
axis.ticks = element_blank(),
legend.title = element_text(size = 8, color = "black"),
legend.text = element_text(size = 8, color = "black"),
legend.position = c(0.6, 0.9),
plot.background = element_blank(),
panel.background = element_blank(),
)
fstHeatmap
Making stairway plot of effective population sizes of each lineage throughout time
bl = read.table("../../data/ofav/SFS/ofavBlue.final.summary", header = TRUE) %>% mutate("Lineage" = "Blue")
or = read.table("../../data/ofav/SFS/ofavOrange.final.summary", header = TRUE) %>% mutate("Lineage" = "Orange")
pr = read.table("../../data/ofav/SFS/ofavPurple.final.summary", header = TRUE) %>% mutate("Lineage" = "Purple")
swData = rbind(bl, or, pr)
swData$Lineage = factor(swData$Lineage)
swData$Lineage = factor(swData$Lineage, levels = levels(swData$Lineage)[c(1,2,3)])
Constuct plot
swPlotA = ggplot(data = swData, aes(x = year, y = Ne_median, ymin = Ne_12.5., ymax = Ne_87.5., color = Lineage, fill = Lineage)) +
geom_ribbon(color = NA, aes(alpha = Lineage)) +
# geom_line(size = 0.6) +
geom_line(linewidth = 1.15) +
scale_fill_manual(values = kColPal[c(1:3)]) +
scale_color_manual(values = kColPal[c(1:3)]) +
scale_alpha_manual(values = c(0.25, 0.25, 0.35)) +
scale_x_reverse(name = "KYA", limits = c(0,5.25e5), breaks = c(1e5,2e5,3e5,4e5,5e5), labels = c("100","200", "300", "400", "500")) +
scale_y_continuous(name = bquote(italic(N[e])~"(x10"^"3"*")"), limits = c(0,3.5e5), breaks = c(1e5,2e5,3e5, 4e5), labels = c("100","200", "300","400"))+
coord_cartesian(xlim = c(5.25e5, 0), expand = FALSE) +
theme_bw()
swPlot = swPlotA + theme(
axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.key.size = unit(0.3, 'cm'),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(color = "black", size = 12),
legend.position = "none",
# legend.position = c(0.85, 0.82),
plot.background = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(size = 1),
panel.grid = element_blank()
)
swPlot
popData = read.csv("../../data/ofav/ofavMetaData.csv")[-c(11,14,22,32:34,54,59,72),] %>% dplyr::select("sample" = sampleID, "region" = region, "site" = site) # Reads in population data
popData$a = c(0:93)
popData$site = factor(popData$site)
popData$site = factor(popData$site, levels = levels(popData$site)[c(3, 10, 4, 2, 12, 8, 11, 9, 5, 7, 6, 1, 13)])
popData$region = factor(popData$region)
popData$region = factor(popData$region, levels = levels(popData$region)[c(2, 3, 4, 5, 1)])
# sampleData = fknmsSites[-c(66,68,164,166,209,211),] %>% group_by(site, depthZone)%>% summarise(depthZone = (first(depthZone)), depthRange = paste(min(depthM), "--", max(depthM), sep = ""), meanDepth = round(mean(depthM),1), n = n())%>% droplevels() %>% as.data.frame()
#
# sampleTab = sampleData
# colnames(sampleTab) = c("Site", "Depth zone", "Sampling \ndepth (m)", "Average \ndepth (m)", "n")
#
# sampleTab$Site
# finalTabSite = c("Upper Keys", "", "Lower Keys","", "Tortugas Bank", "", "Riley's Hump", "")
#
# sampleTab$Site = finalTabSite
#
hetAll = read.table("../../data/ofav/SFS/ofavHet")
colnames(hetAll) = c("sample", "He")
hetAll$sample <- popData$sample
ofavBreed = read.delim("../../data/ofav/SFS/ofavF.indF", header = FALSE)
ofavRelate = read.delim("../../data/ofav/SFS/ofavFiltRelate")
ofavRelate2 = ofavRelate %>% group_by(a, b) %>% dplyr::select("Rab" = rab, "theta" = theta)
## Adding missing grouping variables: `a`, `b`
ofavRelate2 = ofavRelate2 %>% left_join(popData, by = "a") %>% left_join(popData, by = c("b" = "a"), suffix = c(".a", ".b"))
ofavRelate2$regionsite.a = paste(ofavRelate2$region.a, ofavRelate2$site.a, sep = " ")
ofavRelate2$regionsite.b = paste(ofavRelate2$region.b, ofavRelate2$site.b, sep = " ")
ofavRelate2 = ofavRelate2 %>% left_join((pcangsd %>%dplyr::select(sample, cluster)) , by = c("sample.a" = "sample")) %>% left_join((pcangsd %>%dplyr::select(sample, cluster)) , by = c("sample.b" = "sample"))
ofavRelate = ofavRelate2 %>% filter(cluster.x != "Admixed",cluster.x == cluster.y) %>% rename(region = region.a, site = site.a, cluster = cluster.x)
ofavRelateMean = ofavRelate %>% group_by(region, site) %>% dplyr::summarize(N = n(), meanRab = mean(Rab), seRab = sd(Rab)/sqrt(N), meanTheta = mean(theta), seTheta = sd(theta)/sqrt(N)) %>% dplyr::select(-N)
## `summarise()` has grouped output by 'region'. You can override using the `.groups`
## argument.
het = left_join(popData, hetAll, by = "sample") %>% mutate("inbreed" = ofavBreed$V1) %>% left_join((pcangsd %>% dplyr::select(sample, cluster)) , by = "sample") %>% dplyr::select(-a)
hetStats = het %>% group_by(cluster) %>% summarise(N = n(), meanAll = mean(He), sdAll = sd(He), seAll = sd(He)/sqrt(N), meanInbreed = mean(inbreed), sdInbreed = sd(inbreed), seInbreed = sd(inbreed)/sqrt(N))
hetStats
## # A tibble: 4 × 8
## cluster N meanAll sdAll seAll meanInbreed sdInbreed seInbreed
## <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Blue 54 0.00111 0.000469 0.0000638 0.0828 0.0455 0.00620
## 2 Orange 29 0.00189 0.00113 0.000210 0.183 0.102 0.0189
## 3 Purple 4 0.00115 0.000311 0.000155 0.487 0.0990 0.0495
## 4 Admixed 7 0.00566 0.00207 0.000783 0.258 0.325 0.123
Heterozygosity across all RAD loci by lineage
leveneTest(lm(He ~ cluster, data = subset(het, subset = pcangsd$cluster!="Admixed")))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 6.7088 0.0004142 ***
## 83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# p = 0.0004142
hetAov = welch_anova_test(He ~ cluster, data = subset(het, subset = het$cluster!="Admixed"))
# p = 0.021
hF = hetAov$statistic[[1]]
hetPH = games_howell_test(He ~ cluster, data = subset(het, subset = het$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
# He Blue Orange 0.00078199234 0.0002437016 0.00132028307 0.003 **
# He Blue Purple 0.00003888889 -0.0005539790 0.00063175673 0.971 ns
# He Orange Purple -0.00074310345 -0.0014110593 -0.00007514763 0.028 *
hetLetters = data.frame(x = factor(c("Blue", "Orange", "Purple")), y = c(0.0065, 0.0065, 0.0065), grp = c("a", "b", "a"))
lab_str <- sprintf("italic(F) == %.2f ~ ', ' ~ italic(p) == %.3f", hF, 0.021)
hetPlotKA = ggplot(data = het %>% filter(cluster != "Admixed"), aes(x = cluster, y = He)) +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 0.75, alpha = 0.5) +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0, color = "black", alpha = 0.35, width = 0.9, trim = F, scale = "width") +
geom_violin(aes(fill = cluster, group = cluster), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "width") +
geom_boxplot(aes(fill = cluster, group = cluster), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) + xlab("Lineage") +
geom_text(data = hetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(
geom = "text",
x = 1,
y = 0,
label = lab_str,
parse = TRUE,
size = 3
) +
scale_fill_discrete(type = kColPal, name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
# coord_cartesian(expand = TRUE, xlim = c(0.85, 4)) +
theme_bw()
hetPlotK = hetPlotKA + theme(
axis.title = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
hetPlotK
leveneTest(lm(inbreed ~ cluster, data = subset(het, subset = pcangsd$cluster!="Admixed")))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 9.8701 0.00001231 ***
## 83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ibAov = welch_anova_test(inbreed ~ cluster, data = subset(het, subset = het$cluster!="Admixed"))
iF = ibAov$statistic[[1]]
ibPH = games_howell_test(inbreed ~ cluster, data = subset(het, subset = het$cluster!="Admixed"), conf.level = 0.95) %>% as.data.frame()
inbreedLetters = data.frame(x = factor(c("Blue", "Orange", "Purple")), y = c(0.8, 0.8, 0.8), grp = c("a", "b", "c"))
lab_str <- sprintf("italic(F) == %.2f ~ ', ' ~ italic(p) < 0.001", hF)
inbreedingPlot = ggplot(data = het %>% filter(cluster!="Admixed"), aes(x = cluster, y = inbreed)) +
geom_beeswarm(aes(fill = cluster), shape = 21, size = 2, cex = 1.5, alpha = 0.5) +
geom_violin(aes(fill = cluster), adjust = 1, linewidth = 0, color = "black", alpha = 0.35, width = 0.9, trim = F, scale = "width") +
geom_violin(adjust = 1, linewidth = 0.4, color = "black", alpha = 1, width = 0.9, trim = F, fill = NA, scale = "width") +
geom_boxplot(aes(fill = cluster),width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.6, alpha = 0.5) +
geom_text(data = inbreedLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(
geom = "text",
x = 1,
y = -0.1,
label = lab_str,
parse = TRUE,
size = 3
) +
xlab("Lineage") +
ylab(bquote(~"Inbreeding coefficient ("*italic(F)*")")) +
scale_fill_manual(values = kColPal) +
scale_color_manual(values = kColPal) +
# scale_y_continuous(breaks=seq(0, 0.8, by = .05)) +
# coord_cartesian(expand = TRUE, xlim = c(0.78, 4)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size = 10, color = "black"),
axis.title = element_text(size = 12, color = "black"),
panel.border = element_rect(color = "black", size = 1),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
inbreedingPlot
# mean depth for lineages
subset(pcangsd, subset = pcangsd$cluster!="Admixed") %>% group_by(cluster)
## # A tibble: 87 × 14
## # Groups: cluster [3]
## regionsite sample region site PC1 PC2 PC3 PC4 cluster1 cluster2
## <fct> <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Broward ree… urban… Browa… BC1 0.0382 -0.0648 -0.00177 2.27e-2 0.824 0
## 2 Broward ree… urban… Browa… BC1 0.0772 -0.00884 0.00115 -1.89e-4 1 0
## 3 Broward ree… urban… Browa… BC1 0.0467 -0.0790 -0.0182 7.02e-3 0.834 0
## 4 Broward ree… urban… Browa… BC1 -0.124 0.0728 0.170 -1.43e-1 0.0297 0.927
## 5 Broward ree… urban… Browa… BC1 0.0823 -0.00593 0.00422 7.40e-3 1 0
## 6 Broward ree… urban… Browa… BC1 0.0823 -0.00559 -0.00945 -1.11e-2 1 0
## 7 Broward ree… urban… Browa… BC1 0.0787 -0.00200 0.00767 -1.01e-2 1 0
## 8 Broward ree… urban… Browa… FTL4 0.0792 -0.00104 -0.00227 1.43e-3 1 0
## 9 Broward ree… urban… Browa… T328 0.0821 -0.00447 -0.00247 9.89e-4 1 0
## 10 Miami reef … nmfs_… Miami… Emer… 0.0822 -0.00462 0.00174 -1.43e-2 1 0
## # ℹ 77 more rows
## # ℹ 4 more variables: cluster3 <dbl>, cluster <fct>, mean.x <dbl>, mean.y <dbl>
npgList = list(read_tsv("../../data/ofav/SFS/blue.thetas.idx.pestPG") %>% mutate(lineage = "Blue"),
read_tsv("../../data/ofav/SFS/orange.thetas.idx.pestPG") %>% mutate(lineage = "Orange"),
read_tsv("../../data/ofav/SFS/purple.thetas.idx.pestPG") %>% mutate(lineage = "Purple"))
## Rows: 21 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 21 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 21 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): #(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinSt...
## dbl (12): WinCenter, tW, tP, tF, tH, tL, Tajima, fuf, fud, fayh, zeng, nSites
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
piAll = purrr::reduce(npgList, rbind) %>%
group_by(lineage) %>%
mutate(tPps = tP/nSites)
# dplyr::summarize(tPps = mean(tPps))
piAll$lineage = as.factor(piAll$lineage)
piAll$lineage = factor(piAll$lineage, levels(piAll$lineage)[c(1, 2, 3)])
lmpi = lm(tPps~lineage, data=piAll)
summary(lmpi)
##
## Call:
## lm(formula = tPps ~ lineage, data = piAll)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0039311 -0.0003734 0.0000106 0.0004420 0.0036105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0016831 0.0002481 6.783 0.00000000586 ***
## lineageOrange 0.0008065 0.0003509 2.298 0.0251 *
## lineagePurple 0.0022480 0.0003509 6.406 0.00000002553 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001137 on 60 degrees of freedom
## Multiple R-squared: 0.4125, Adjusted R-squared: 0.3929
## F-statistic: 21.06 on 2 and 60 DF, p-value: 0.0000001175
r2 = round(summary(lmpi)$r.squared, 3)
F = round(summary(lmpi)$fstatistic[1], 2)
nuclDivLetters = data.frame(x = factor(c("Blue", "Orange", "Purple")), y = c(0.8, 0.8, 0.8), grp = c("a", "b", "c"))
lab_str <- sprintf("italic(F) == %.2f ~ ', ' ~ italic(p) < 0.001", hF)
nuclDivPlot = ggplot(piAll, aes(x = lineage, y = tPps)) +
geom_smooth(se = F, color = 'black', method='lm', linewidth = 0.75) +
geom_point(aes(fill = lineage),shape = 21, size = 3) +
scale_color_manual(values = kColPal) +
scale_fill_manual(values = kColPal) +
labs(x='Lineage', y = bquote("Nucleotide diversity ("*pi*")"), shape = 'Lineage') +
annotate(
geom = "text",
x = 1,
y = 0,
label = lab_str,
parse = TRUE,
size = 3
) +
theme_bw() +
theme(axis.title.y = element_text(color = "black", size = 12),
axis.title.x = element_text(color = "black", size = 12),
axis.text = element_text(color = "black", size = 10),
legend.position = "none",
legend.key.size = unit(0.3, 'cm'),
panel.border = element_rect(color = "black", size = 1),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
nuclDivPlot
## Ignoring unknown labels:
## • shape : "Lineage"
## `geom_smooth()` using formula = 'y ~ x'
piAll$Ne = (piAll$tPps)/(4*2e-8)
piAll
## # A tibble: 63 × 17
## # Groups: lineage [3]
## #(indexStart,indexStop)(…¹ Chr WinCenter tW tP tF tH tL Tajima fuf
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (0,7071)(623754,39391467)… ofav… 19695733 17.3 6.36 0 0 0 -2.06 0.647
## 2 (0,5052)(1162434,26811926… ofav… 13405963 19.7 10.4 0 0 0 -1.54 0.941
## 3 (0,3470)(418234,22529479)… ofav… 11264739 12.0 3.81 0 0 0 -2.18 0.537
## 4 (0,2918)(140756,23578884)… ofav… 11789442 10.6 5.25 0 0 0 -1.60 0.821
## 5 (0,2889)(103770,21853617)… ofav… 10926808 9.50 3.22 0 0 0 -2.08 0.552
## 6 (0,3554)(918984,17765947)… ofav… 8882973 11.7 6.23 0 0 0 -1.49 0.894
## 7 (0,1782)(314924,15825186)… ofav… 7912593 7.52 2.67 0 0 0 -2.00 0.554
## 8 (0,1733)(445733,12850409)… ofav… 6425204 5.39 3.33 0 0 0 -1.15 0.899
## 9 (0,1506)(181072,11768574)… ofav… 5884287 4.70 1.92 0 0 0 -1.75 0.576
## 10 (0,1835)(176724,9888138)(… ofav… 4944069 5.43 2.38 0 0 0 -1.69 0.639
## # ℹ 53 more rows
## # ℹ abbreviated name:
## # ¹​`#(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinStop)`
## # ℹ 7 more variables: fud <dbl>, fayh <dbl>, zeng <dbl>, nSites <dbl>, lineage <fct>,
## # tPps <dbl>, Ne <dbl>
subset(pcangsd, subset = pcangsd$cluster!="Admixed") %>% group_by(cluster)
## # A tibble: 87 × 14
## # Groups: cluster [3]
## regionsite sample region site PC1 PC2 PC3 PC4 cluster1 cluster2
## <fct> <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Broward ree… urban… Browa… BC1 0.0382 -0.0648 -0.00177 2.27e-2 0.824 0
## 2 Broward ree… urban… Browa… BC1 0.0772 -0.00884 0.00115 -1.89e-4 1 0
## 3 Broward ree… urban… Browa… BC1 0.0467 -0.0790 -0.0182 7.02e-3 0.834 0
## 4 Broward ree… urban… Browa… BC1 -0.124 0.0728 0.170 -1.43e-1 0.0297 0.927
## 5 Broward ree… urban… Browa… BC1 0.0823 -0.00593 0.00422 7.40e-3 1 0
## 6 Broward ree… urban… Browa… BC1 0.0823 -0.00559 -0.00945 -1.11e-2 1 0
## 7 Broward ree… urban… Browa… BC1 0.0787 -0.00200 0.00767 -1.01e-2 1 0
## 8 Broward ree… urban… Browa… FTL4 0.0792 -0.00104 -0.00227 1.43e-3 1 0
## 9 Broward ree… urban… Browa… T328 0.0821 -0.00447 -0.00247 9.89e-4 1 0
## 10 Miami reef … nmfs_… Miami… Emer… 0.0822 -0.00462 0.00174 -1.43e-2 1 0
## # ℹ 77 more rows
## # ℹ 4 more variables: cluster3 <dbl>, cluster <fct>, mean.x <dbl>, mean.y <dbl>
lineagePlots = (pcaPlot12L | hetPlotK | inbreedingPlot) / ( swPlot | fstHeatmap) +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 14))
# lineagePlots
ggsave("../../figures/ofav/SFS.png", plot = lineagePlots, height = 7, width = 12, units = "in", dpi = 300)
# ggsave("../../figures/ofav/SFS.svg", plot = lineagePlots, height = 7, width = 12, units = "in", dpi = 300)
# Isolation by distance
library(geosphere)
#Get the geographic distances in km
coords = read.csv("../../data/ofav/ofavMetaData.csv")[-c(11,14,22,32:34,54,59,72),] %>%
dplyr::select(longitude, latitude)
dGeo = as.dist((distm(coords, fun = distGeo)/1000), diag = TRUE)
snpDist = as.dist(read.table("../../data/ofav/SFS/ofavFiltSnps.ibsMat"), diag = TRUE)
# Test IBD
set.seed(694)
snpIBD = mantel.randtest(dGeo, snpDist, nrepet = 9999)
snpIBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = dGeo, m2 = snpDist, nrepet = 9999)
##
## Observation: -0.08826195
##
## Based on 9999 replicates
## Simulated p-value: 0.989
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -1.95365759347 -0.00006996177 0.00203779904
snpGenDist = melt(as.matrix(snpDist), varnames = c("row", "col"), value.name = "dist")
snpGenDist = snpGenDist[snpGenDist$row != snpGenDist$col,]
geo = melt(as.matrix(dGeo), varnames = c("row", "col"), value.name = "geo")
geo = geo[geo$row != geo$col,]
snpMantelDF = data.frame(cbind(snpGenDist$dist, geo$geo))
colnames(snpMantelDF) = c("dist", "geo")
snpMantelA = ggplot(data = snpMantelDF, aes(x = geo, y = dist)) +
scale_fill_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
stat_density_2d(aes(fill = stat(density)), n = 300, contour = FALSE, geom = "raster") +
geom_smooth(method = lm, col = "black", fill = "gray40", fullrange = TRUE) +
geom_point(shape = 21, fill = "gray40", alpha = 0.25) +
# scale_x_continuous(limits = c(0,300), expand = c(0,0)) +
# scale_y_continuous(limits = c(0.25,0.5), breaks = seq(0.25,0.5, by = 0.05), expand = c(0,0)) +
annotate("label", x = 40, y = 0.05,
label = paste("r = ", round(snpIBD$obs, 3), "; p = ", snpIBD$pvalue),
size = 4, alpha = 0.6) +
labs(x = "Geographic distance (km)", y = expression(paste("Genetic distance "))) +
ggtitle("Isolation by Distance (IBD)") +
theme_bw()
snpMantel = snpMantelA + theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 12, color = "black"),
axis.ticks.x = element_line(color = "black"),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
axis.ticks.y = element_line(color = "black"),
axis.line.y = element_blank(),
panel.border = element_rect(size = 1.2, color = "black"),
plot.margin = margin(0.2,0.5,0.1,0.1, unit = "cm"),
legend.position = "none")
snpMantel
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../../figures/ofav/Mantel.png", plot = snpMantel, height = 4, width = 6, units = "in", dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'